# This script is part 4 of the pipeline; It:
# 1. imports data after qc filtering, sensor self heating correction and gap filling
# 2. imports the same data as in 1. but without drift correction and imports dyn_metadata file
# 3. generates table and plot of data availability
# 4. generates plot of Sensor Self Heating Correction
# 5. generates plot of CO2 and H2O mixing ratio before and after drift correction
# 6. calculates and plot uncertainty of gap filling results
# 7. plots wind-rose and TKE-rose
# 8. filters data with cold temperatures and plots NEE during snow cover
# 9. plots monthly diurnal course of air and soil temperature
# 
# Felix Nieberding
# 2020-08-26


rm(list=ls())
Sys.setenv(TZ='UTC')


library(tidyverse)
library(lubridate)
library(data.table)
library(extrafont)
library(ggpubr)
library(gridExtra)
library(ggpmisc)
library(pracma)
library(lognorm)
library(REddyProc)
library(grid)
library(cowplot)
library(xtable)
library(cmocean)
library(openair)
library(cowplot)

setwd(dir = "~/TransTiP/_NamCo_sync/3_data_qa-qc/Flux data/ESSD_scripts_review/")

# labeller
month_name <- c("1" = "January","2" = "February","3" = "March","4" = "April","5" = "May","6" = "June","7" = "July","8" = "August","9" = "September","10" = "October","11" = "November","12" = "December")


# data import -------------------------------------------------------------
# import after drift correction 
df_SSH <- fread("df_WLG_openeddy_SSH.csv", na.strings = c("-9999", "-9999.0")) %>% mutate(DATETIME = as.POSIXct(DATETIME))

df_GF <- fread("df_WLG_openeddy_SSH_gapfill.csv", na.strings = c("-9999", "-9999.0")) %>% mutate(DATETIME = as.POSIXct(DATETIME))

# import before drift correction 
df_ORG <- fread("df_ORG_openeddy_SSH.csv", na.strings = c("-9999", "-9999.0")) %>% mutate(DATETIME = as.POSIXct(DATETIME)) 

# import meteo data 
df_meteo <- fread("~/TransTiP/_NamCo_sync/2_data_processed/Met data/NAMORS_PBL_2005-2019/NAMORS_metdata_2005-2019_200702.csv", na.strings = c("-9999")) %>%
  mutate(DATETIME = ymd_hms(DATETIME))

# import dynamic metadata 
df_dyn <- fread("~/TransTiP/_NamCo_sync/3_data_qa-qc/Flux data/drift correction/dynamic_metadata_WLG_200715.txt",
                na.strings = c("-9999", "-9999.0")) %>%
  mutate(DATETIME = as.POSIXct(paste(date, time), format="%Y-%m-%d %H:%M"))

# join files
df <- left_join(df_SSH, df_GF, by = "DATETIME")
df <- df %>%  mutate_at(.vars = vars(c(-"filename", -"date", -"time", -"DATETIME")), as.numeric)
df <- left_join(df, df_meteo, by = "DATETIME")

df_comp <- left_join(df_ORG, df_SSH, suffix = c("_ORG", "_WLG"), by = "DATETIME")
df_comp <- left_join(df_comp, df_dyn, by = "DATETIME")


# data availability -------------------------------------------------------
# table
df_avail <- df %>%
  mutate(NEE_fetch = ifelse(qc_co2_flux == 2 | qc_wind_dir == 2, NA, co2_flux),
         NEE_ustar = ifelse(qc_co2_flux == 2 | qc_wind_dir == 2 | qc_ustar == 2, NA, co2_flux),
         NEE_prelim = ifelse(qc_NEE_prelim == 2, NA, co2_flux),
         NEE_despike = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         h2o_flux_fetch = ifelse(qc_h2o_flux == 2 | qc_wind_dir == 2, NA, h2o_flux),
         h2o_flux_ustar = ifelse(qc_h2o_flux == 2 | qc_wind_dir == 2 | qc_ustar == 2, NA, h2o_flux),
         h2o_flux_prelim = ifelse(qc_h2o_prelim == 2, NA, h2o_flux),
         h2o_flux_despike = ifelse(qc_h2o_composite == 2, NA, h2o_flux)) %>%
  select(DATETIME, co2_flux, NEE_fetch, NEE_ustar, NEE_prelim, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_prelim, h2o_flux_despike) %>%
  mutate_at(.vars = vars(-"DATETIME"), as.numeric) %>%
  left_join(tibble(DATETIME = seq.POSIXt(from = as.POSIXct("2005-01-01 00:00"), to = as.POSIXct("2019-12-31 23:30"), by = 1800)),. , by = "DATETIME") %>%
  group_by(year(DATETIME)) %>%
  summarise_at(vars(co2_flux, NEE_fetch, NEE_ustar, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_despike), ~(sum(!is.na(.)))) %>%
  mutate(total = c(17520, 17520, 17520, 17568, 17520, 17520, 17520, 17568, 17520, 17520, 17520, 17568, 17520, 17520, 17520)) %>%
  mutate_at(vars(co2_flux, NEE_fetch, NEE_ustar, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_despike), ~. / total * 100) %>% 
  mutate_at(vars(co2_flux, NEE_fetch, NEE_ustar, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_despike), ~round(.,digits = 1)) 

df_total <- df %>%
  mutate(NEE_fetch = ifelse(qc_co2_flux == 2 | qc_wind_dir == 2, NA, co2_flux),
         NEE_ustar = ifelse(qc_co2_flux == 2 | qc_wind_dir == 2 | qc_ustar == 2, NA, co2_flux),
         NEE_prelim = ifelse(qc_NEE_prelim == 2, NA, co2_flux),
         NEE_despike = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         h2o_flux_fetch = ifelse(qc_h2o_flux == 2 | qc_wind_dir == 2, NA, h2o_flux),
         h2o_flux_ustar = ifelse(qc_h2o_flux == 2 | qc_wind_dir == 2 | qc_ustar == 2, NA, h2o_flux),
         h2o_flux_prelim = ifelse(qc_h2o_prelim == 2, NA, h2o_flux),
         h2o_flux_despike = ifelse(qc_h2o_composite == 2, NA, h2o_flux)) %>%
  select(DATETIME, co2_flux, NEE_fetch, NEE_ustar, NEE_prelim, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_prelim, h2o_flux_despike) %>%
  mutate_at(.vars = vars(-"DATETIME"), as.numeric) %>% 
  summarise_at(vars(co2_flux, NEE_fetch, NEE_ustar, NEE_despike, NEE_f, h2o_flux, h2o_flux_fetch, h2o_flux_ustar, h2o_flux_despike), ~(sum(!is.na(.))/length(.)*100))
  
df_avail <- as.data.frame(df_avail %>% select(-total) %>% mutate_all(as.character))

print(xtable(df_avail, type = "latex"), file = "dataAvailability_WLG.tex", include.rownames = F)

# plot
plot_da <- 
  df %>%
  select(DATETIME, NEE_orig, NEE_f) %>% 
  mutate(DOY_TIME = yday(DATETIME) + 
           as.numeric(paste(str_sub(as.character(DATETIME), 12, 13), 
                            str_sub(as.character(DATETIME), 15, 16), sep = "")) / 2400,
         NEE_f = ifelse(is.na(NEE_orig) == T & 
                          !is.na(NEE_f) == T, 
                        NEE_f, NA)) %>% 
  pivot_longer( names_to = "series", values_to = "flux", cols = starts_with("NEE")) %>%
  mutate(Year = factor(year(DATETIME), levels = c("2005", "2013", "2006", "2014", "2007", "2015", "2008","2016","2009","2017","2010","2018","2011","2019","2012"))) %>%
  ggplot(aes(DOY_TIME, flux, color = series, group = year(DATETIME))) +
  geom_point(na.rm =T , size = .4, alpha = 1/10) +
  scale_y_continuous(limits = c(-8, 8), breaks = c(-5, 0, 5)) +
  facet_wrap(~Year, ncol = 2, strip.position = "right" ) +
  labs(y = expression('CO'[2]*' flux ('*mu*'mol m'^-2*' s'^-1*')')) +
  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  scale_color_manual(values = c("grey60", "black"), labels = c("Gap-filled", "Original")) +
  scale_x_continuous(breaks = c(1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335), 
                     labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
                     expand = c(.005, .001)) +
  theme_cleveland() +
  theme(legend.position= "top", legend.title = element_blank(), legend.key = element_blank(),
        panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA), panel.spacing = unit(0.2, "cm"),
        strip.background = element_rect(colour = "black", fill = NA), 
        axis.title.x = element_blank(), axis.title.y = element_text(angle = 90),
        text = element_text(family = "Calibri", size = 11))


ggsave(filename = "~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/dataAvailability_WLG.png", plot_da, width = 18, height = 14, units = "cm", dpi = 600)
ggsave(filename = "dataAvailability_WLG.png", plot_da, width = 18, height = 14, units = "cm", dpi = 600)
ggsave(filename = "dataAvailability_96dpi_WLG.png", plot_da, width = 18, height = 14, units = "cm", dpi = 96)

# differences between HC  -----------------------------------------------------
df_labels <- data.frame(x = rep(3, times = 12), 
                        y = rep(-2, times = 12),
                        MONTH = c(1:12),
                        month_label = factor(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
                                             levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")))

df_SSH_corr_monthly <- df %>%
  mutate(F_CO2 = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         TIME_DEC = as.numeric(str_sub(as.character(DATETIME), 12, 13)) + as.numeric(str_sub(as.character(DATETIME), 15, 16)) / 60) %>%
  group_by(TIME_DEC, month(DATETIME)) %>%
  summarise_at(vars(F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank), mean, na.rm = T) %>%
  pivot_longer(cols = c(F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank), names_to = "names", values_to = "values") %>%
  rename(MONTH = `month(DATETIME)`) 

plot_SSH_corr_monthly <- df_SSH_corr_monthly %>%
  ggplot(aes(TIME_DEC, values, color = names))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_line() +
  facet_wrap(~MONTH, ncol = 4) +
  geom_label(data = df_labels, aes(x = 1.8, y = -3.2, label = month_label), color = "black") +
  scale_color_manual(labels = c("Uncorrected", "Burba et al. (2008)", "Frank & Massman (2020)", ""),
                     values = c("black", "firebrick", "dodgerblue", "black")) +
  scale_x_continuous(breaks = c(0,6,12,18,24), labels = c(0,6,12,18,24), expand = c(.025,.01)) +
  labs(x = 'Hour of the day', y = expression('Mean CO'[2]*' flux ['*mu*'mol m'^-2*' s'^-1*']')) +
  theme_light() +
  theme(legend.position = "top", legend.key = element_blank(), legend.background = element_rect(fill = "white"), legend.title = element_blank(),
        panel.grid.major.y = element_line(color = "grey70", linetype = "dashed"),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA),
        axis.title.y = element_blank(),
        strip.background = element_blank(), strip.text = element_blank()) 


plot_SSH_corr_annual <- 
  df %>%
  mutate(F_CO2 = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         DOY = yday(DATETIME)) %>%
  group_by(DOY) %>%
  summarise_at(vars(F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank), mean, na.rm = T) %>%
  pivot_longer(cols = c(F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank), names_to = "names", values_to = "values") %>%
  ggplot(aes(DOY, values, color = names))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_line() +
  scale_color_manual(labels = c("Uncorrected", "Burba et al. (2008)", "Frank & Massman (2020)"),
                     values = c("black", "firebrick", "dodgerblue")) + 
  scale_x_continuous(breaks = c(1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335), 
                     labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
                     expand = c(.005,.01)) +
  labs(x = 'Month', y = expression('Mean CO'[2]*' flux ['*mu*'mol m'^-2*' s'^-1*']')) +
  theme_light() +
  theme(legend.position = "none", legend.title = element_blank(), legend.key = element_blank(), legend.background = element_rect(fill = "white"),
        legend.spacing.y = unit(0, "cm"),
        panel.grid.major.y = element_line(color = "grey70", linetype = "dashed"),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA),
        axis.title.y = element_blank())

# build plot
plot_combo <- 
  plot_grid(
    plot_SSH_corr_monthly, 
    plot_SSH_corr_annual, 
    nrow = 2, rel_heights = c(2,1))

y.grob <- textGrob(expression('CO'[2]*' flux ('*mu*'mol m'^-2*' s'^-1*')'), 
                   gp=gpar(fontsize=11), rot=90)

plot_combo_2 <- grid.arrange(arrangeGrob(plot_combo, left = y.grob))

# save plot
ggsave("SSH_corr.png", plot_combo_2, width = 18, height = 14, units = "cm", dpi = 600)
ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/SSH_corr.png", plot_combo_2, width = 18, height = 14, units = "cm", dpi = 600)


# flux uncertainty --------------------------------------------------------
# MDS uncertainty ---------------------------------------------------------
df %>%
  filter(NEE_fmeth == 1 & NEE_fwin <= 14 & !is.na(NEE_orig)) %>%
  select(NEE_orig, NEE_fall, rand_err_co2_flux, NEE_fsd) %>%
  summarise_all(.funs = c(~median(., na.rm = T), ~IQR(., na.rm = T), ~sd(., na.rm = T)))

plot_uncert_RE_MDS <- df %>%
  select(DATETIME, rand_err_co2_flux, NEE_orig, NEE_fall, NEE_fsd, NEE_fmeth, NEE_fwin) %>%
  filter(NEE_fmeth == 1 & NEE_fwin <= 14 & !is.na(NEE_orig)) %>%
  pivot_longer(cols = c(rand_err_co2_flux, NEE_fsd), names_to = "names", values_to = "values") %>%
  ggplot(aes(x = values, fill = names)) +
  geom_histogram(position = "identity", binwidth = 0.02, na.rm = T, alpha = 0.4) +
  scale_x_continuous(limits = c(0,3)) +
  scale_fill_manual(values = c("firebrick", "dodgerblue"), labels = c("MDS", "RE")) +
  labs(x = expression('NEE uncertainty ('*mu*'mol m'^-2*'s'^-1*')'), y = "Number of observations (#)") +
  theme_cowplot() +
  theme(legend.position = c(.7,.8), legend.title = element_blank())

ggsave("CO2_uncertainty_RE_MDS_WLG.png", plot_uncert_RE_MDS, dpi = 300)
ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/CO2_uncertainty_RE_MDS_WLG.png", plot_uncert_RE_MDS, width = 8.3, height = 5, units = "cm", dpi = 600)


# plot monthly and annual uncertainty -------------------------------------
df_uncert_monthly <- df %>%
  mutate(F_CO2 = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         TIME_DEC = as.numeric(str_sub(as.character(DATETIME), 12, 13)) + as.numeric(str_sub(as.character(DATETIME), 15, 16)) / 60) %>%
  filter(NEE_fmeth == 1 & NEE_fwin <= 14 & !is.na(NEE_orig)) %>%
  group_by(TIME_DEC, month(DATETIME)) %>%
  summarise_at(vars(F_CO2, rand_err_co2_flux, NEE_fsd), mean, na.rm = T) %>%
  rename(MONTH = `month(DATETIME)`) 

plot_uncert_monthly <- 
  df_uncert_monthly %>%
  ggplot(aes(TIME_DEC))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_ribbon(aes(ymin = F_CO2 - NEE_fsd, ymax = F_CO2 + NEE_fsd, fill = "NEE_fsd")) +
  geom_ribbon(aes(ymin = F_CO2 - rand_err_co2_flux, ymax = F_CO2 + rand_err_co2_flux, fill = "RE")) +
  geom_line(aes(y = F_CO2, color = "F_CO2")) +
  facet_wrap(~MONTH, ncol = 4) +
  geom_label(data = df_labels, aes(x = 1.8, y = -4.2, label = month_label), color = "black") +
  scale_fill_manual(labels = c("NEE_fsd", "RE"),
                     values = c(alpha("firebrick", .4), alpha("dodgerblue", .4))) +
  scale_color_manual(values = "black", labels = expression('CO'[2]*' flux')) +
  scale_x_continuous(breaks = c(0,6,12,18,24), labels = c(0,6,12,18,24), expand = c(.025,.01)) +
  labs(x = 'Hour of the day', y = expression('Mean CO'[2]*' flux ['*mu*'mol m'^-2*' s'^-1*']')) +
  theme_light() +
  theme(legend.position = "top", legend.key = element_blank(), legend.background = element_rect(fill = "white"), legend.title = element_blank(),
        panel.grid.major.y = element_line(color = "grey70", linetype = "dashed"),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA),
        axis.title.y = element_blank(),
        strip.background = element_blank(), strip.text = element_blank())



plot_uncert_annual <- 
  df %>%
  mutate(F_CO2 = ifelse(qc_NEE_composite == 2, NA, co2_flux),
         DOY = yday(DATETIME)) %>%
  filter(NEE_fmeth == 1 & NEE_fwin <= 14 & !is.na(NEE_orig)) %>%
  group_by(DOY) %>%
  summarise_at(vars(F_CO2, rand_err_co2_flux, NEE_fsd), mean, na.rm = T) %>%
  ggplot(aes(DOY))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_ribbon(aes(ymin = F_CO2 - NEE_fsd, ymax = F_CO2 + NEE_fsd, fill = "NEE_fsd")) +
  geom_ribbon(aes(ymin = F_CO2 - rand_err_co2_flux, ymax = F_CO2 + rand_err_co2_flux, fill = "RE")) +
  geom_line(aes(y = F_CO2, color = "F_CO2")) +
  scale_fill_manual(labels = c("NEE_fsd", "RE"),
                    values = c(alpha("firebrick", .4), alpha("dodgerblue", .4))) +
  scale_color_manual(values = "black", labels = expression('CO'[2]*' flux')) +
  scale_x_continuous(breaks = c(1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335), 
                     labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
                     expand = c(.005,.01)) +
  labs(x = 'Month', y = expression('Mean CO'[2]*' flux ['*mu*'mol m'^-2*' s'^-1*']')) +
  theme_light() +
  theme(legend.position = "none", legend.title = element_blank(), legend.key = element_blank(), legend.background = element_rect(fill = "white"),
        legend.spacing.y = unit(0, "cm"),
        panel.grid.major.y = element_line(color = "grey70", linetype = "dashed"),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA),
        axis.title.y = element_blank())


# build plot
plot_combo <- 
  plot_grid(
    plot_uncert_monthly, 
    plot_uncert_annual, 
    nrow = 2, rel_heights = c(2,1))

y.grob <- textGrob(expression('CO'[2]*' flux ('*mu*'mol m'^-2*' s'^-1*')'), 
                   gp=gpar(fontsize=11), rot=90)

plot_combo_2 <- grid.arrange(arrangeGrob(plot_combo, left = y.grob))

# save plot
ggsave("Flux_uncertainty.png", plot_combo_2, width = 18, height = 14, units = "cm", dpi = 600)
ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/Flux_uncertainty.png", plot_combo_2, width = 18, height = 14, units = "cm", dpi = 600)


# CO2 drift correction --------------------------------------------------------
p_BDC <- df_comp %>%
  mutate(co2_conc =  ifelse(as.numeric(qc_co2_mixing_ratio_composite_ORG) == 2, NA, co2_ref - co2_offset * -1),
         co2_mixing_ratio_ORG = ifelse(as.numeric(qc_co2_mixing_ratio_composite_ORG) == 2, NA, 
                                       as.numeric(co2_mixing_ratio_ORG))) %>% 
  ggplot(aes(x = DATETIME)) +
  geom_point(aes(y=co2_mixing_ratio_ORG, color = "co2_mixing_ratio_ORG"), na.rm = T, size = .5) +
  geom_point(aes(y = co2_conc, color = "co2_conc"), na.rm = T, size = .5) +
  geom_line(aes(y = co2_ref, color = "WLG model"), na.rm = T, size = 1) +
  annotate("pointrange", x = as.POSIXct("2009-06-30 00:00"), y = 390, ymin = 400, ymax = 400,
           colour = "red", size = 1.5, shape = "circle open") +
  annotate("text", x = as.POSIXct("2009-06-30 00:00"), y = 470, label = "user calibration") +
  annotate("pointrange", x = as.POSIXct("2012-04-26 00:00"), y = 400, ymin = 390, ymax = 390,
           colour = "red", size = 1.5, shape = "circle open") +
  annotate("text", x = as.POSIXct("2012-04-26 00:00"), y = 480, label = "user calibration") +
  annotate("pointrange", x = as.POSIXct("2017-06-13 00:00"), y = 388, ymin = 390, ymax = 390,
           colour = "red", size = 2, shape = "circle open") +
  annotate("text", x = as.POSIXct("2016-05-10 00:00"), y = 450, label = "new sensor") +
    annotate("pointrange", x = as.POSIXct("2019-05-15 00:00"), y = 415, ymin = 410, ymax = 410,
           colour = "red", size = 1.5, shape = "circle open") +
  annotate("text", x = as.POSIXct("2018-06-15 00:00"), y = 490, label = "user calibration") +
  scale_y_continuous(limits = c(-50,500)) +
  scale_x_datetime(date_breaks = "1 year", date_labels = "%Y", expand = c(.01,.01)) +
  scale_color_manual(values = c("#1f78b4", "#8fbbd9", alpha("#872f2f", alpha = .6)), 
                     labels = c("daily median", "30-min", "reference")) +
  labs(title = "Before Drift Correction", y = expression('CO'[2]*' mixing ratio ('*mu*'mol mol'^-1*')')) +
  theme_light() +
  theme(legend.position = c(.85,.27), legend.title = element_blank(), legend.key = element_blank(), legend.box.background = element_blank(),
        legend.background = element_blank(), 
        axis.title.x = element_blank(), text = element_text(size = 11),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = "dashed", color = "grey70"),
        axis.title.y = element_blank())

co2_MR_median <- df_comp %>%
  mutate(co2_mixing_ratio_WLG = ifelse(as.numeric(qc_co2_mixing_ratio_composite_WLG) == 2, NA, 
                                       as.numeric(co2_mixing_ratio_WLG))) %>%
  select(DATETIME, co2_mixing_ratio_WLG, date_WLG) %>% 
  right_join(., data.frame(DATETIME = seq.POSIXt(first(.$DATETIME), last(.$DATETIME), by = 1800))) %>%
  group_by(as.Date(DATETIME)) %>%
  summarise_at("co2_mixing_ratio_WLG", median, na.rm=T) %>%
  rename(co2_mixing_ratio_WLG_daily = co2_mixing_ratio_WLG,
         date_WLG = `as.Date(DATETIME)`)

co2_MR <- data.frame(date = rep(pull(co2_MR_median, date_WLG), each = 48),
                     value = rep(pull(co2_MR_median, co2_mixing_ratio_WLG_daily), each = 48))

co2_mr_tmp <- co2_MR[c(47:241225),]

p_ADC <- df_comp %>%
  mutate(co2_conc =  co2_ref - co2_offset * -1,
         co2_mixing_ratio_WLG = ifelse(as.numeric(qc_co2_mixing_ratio_composite_WLG) == 2,
                                       NA, as.numeric(co2_mixing_ratio_WLG)),
         co2_mixing_ratio_WLG_daily = ifelse(is.na(co2_offset) == T | is.na(h2o_offset) == T | as.numeric(qc_co2_mixing_ratio_composite_ORG) == 2,
                                             NA, co2_mr_tmp$value)) %>% 
  ggplot(aes(x = DATETIME)) +
  geom_point(aes(y = as.numeric(co2_mixing_ratio_WLG), color = "30-min co2_mixing_ratio"), na.rm = T, size = .5) +
  geom_point(aes(y = co2_mixing_ratio_WLG_daily, color = "median daily co2_mixing_ratio"), na.rm = T, size = .5) +
  geom_line(aes(y = co2_ref, color = "WLG model"), na.rm = T, size = 1) +
  # scale_y_continuous(limits = c(300,500)) +
  scale_x_datetime(date_breaks = "1 year", date_labels = "%Y", expand = c(.01,.01)) +
  scale_color_manual(values = c( "#8fbbd9", "#1f78b4", alpha("#872f2f", alpha = .6)), 
                     labels = c( "30 min", "daily median", "reference"), name = "Legend:") +
  labs(title = "After Drift Correction", y = expression('CO'[2]*' mixing ratio ('*mu*'mol mol'^-1*')')) +
  theme_light() +
  theme(legend.position = "none", legend.key = element_blank(), legend.box.background = element_rect(),
        axis.title.x = element_blank(), text = element_text(size = 11),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = "dashed", color = "grey70"),
        axis.title.y = element_blank())

# build plot
driftCorrectionOffset <- arrangeGrob(p_BDC, p_ADC, nrow = 2)

y.grob <- textGrob(expression('CO'[2]*' mixing ratio ('*mu*'mol mol'^-1*' dry air)'), 
                   gp=gpar(fontsize=11), rot=90)

driftCorrectionOffset1 <- arrangeGrob(arrangeGrob(driftCorrectionOffset, left = y.grob))

# save plot
ggsave(filename = "~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/driftCorrectionOffset_CO2.png", driftCorrectionOffset1,  width = 18, height = 10, units = "cm", dpi = 600)
ggsave(filename = "driftCorrectionOffset_CO2.png", driftCorrectionOffset1,  width = 18, height = 10, units = "cm", dpi = 600)


# H2O drift correction ----------------------------------------------------
bdc_h2o <- df_comp %>%
  select(DATETIME, h2o_mixing_ratio_Li7500_ORG, h2o_ref, h2o_offset, qc_h2o_mixing_ratio_Li7500_composite_ORG) %>%
  mutate(h2o_mixing_ratio_ORG = h2o_mixing_ratio_Li7500_ORG, 
         h2o_conc = h2o_ref) %>%
  group_by(as_date(DATETIME)) %>%
  summarise_at(vars(c("h2o_mixing_ratio_ORG", "h2o_conc")), mean, na.rm =T) %>%
  pivot_longer(c(h2o_mixing_ratio_ORG, h2o_conc), names_to = "names", values_to = "values") %>%
  mutate(names = factor(names, levels = c("h2o_conc", "h2o_mixing_ratio_ORG"))) %>%
  ggplot(aes(`as_date(DATETIME)`, values, color = names)) +
  geom_point(alpha = .5, size = .5, na.rm = T) +
  guides(color = guide_legend(override.aes = list(size = 2.5, alpha = 1))) +
  scale_y_continuous(limits = c(0,110), breaks = c(0,25,50,75,100)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", expand = c(.01,.01)) +
  labs(subtitle = "Before Drift Correction", y = expression('H'[2]*'O mixing ratio (mmol mol'^-1*')')) +
  scale_color_manual(values = c( "firebrick", "dodgerblue"), 
                     labels = c( "Low frequency", "Eddy covariance"), name = "Measurement:") +
  theme_light() +
  theme(legend.position = c(.5, .85), legend.title = element_blank(), legend.key = element_blank(), legend.box.background = element_blank(),
        legend.background = element_blank(), 
        axis.title.x = element_blank(), text = element_text(size = 11),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = "dashed", color = "grey70"),
        axis.title.y = element_blank())

adc_h2o <- df_comp %>%
  select(DATETIME, h2o_mixing_ratio_Li7500_WLG, h2o_ref, h2o_offset, qc_h2o_mixing_ratio_Li7500_composite_WLG) %>%
  mutate(h2o_mixing_ratio_Li7500_WLG = h2o_mixing_ratio_Li7500_WLG, 
         h2o_conc = h2o_ref) %>%
  group_by(as_date(DATETIME)) %>%
  summarise_at(vars(h2o_mixing_ratio_Li7500_WLG, h2o_conc), mean, na.rm =T) %>%
  pivot_longer(c(h2o_mixing_ratio_Li7500_WLG, h2o_conc), names_to = "names", values_to = "values") %>%
  mutate(names = factor(names, levels = c("h2o_conc", "h2o_mixing_ratio_Li7500_WLG"))) %>%
  ggplot(aes(`as_date(DATETIME)`, values, color = names)) +
  geom_point(alpha = .5, size = .5, na.rm = T) +
  guides(color = guide_legend(override.aes = list(size = 2.5, alpha = 1))) +
  labs(subtitle = "After Drift Correction", y = expression('H'[2]*'O mixing ratio (mmol mol'^-1*')')) +
  scale_color_manual(values = c( "firebrick", "dodgerblue"),
                     labels = c( "Low frequency", "Eddy covariance"), name = "Measurement:") +
  scale_y_continuous(limits = c(0,22.5)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", expand = c(.01,.01)) +
  theme_light() +
  theme(legend.position = "none", legend.key = element_blank(), legend.box.background = element_rect(),
        axis.title.x = element_blank(), text = element_text(size = 11),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = "dashed", color = "grey70"),
        axis.title.y = element_blank())

driftCorrectionOffsetH2O <- arrangeGrob(bdc_h2o, adc_h2o, nrow = 2)

y.grob <- textGrob(expression('H'[2]*'O mixing ratio (mmol mol'^-1*' dry air)'), 
                   gp=gpar(fontsize=11), rot=90)

driftCorrectionOffsetH2O_comb <- arrangeGrob(arrangeGrob(driftCorrectionOffsetH2O, left = y.grob))

ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/driftCorrectionOffset_H2O.png", driftCorrectionOffsetH2O_comb,  width = 18, height = 10, units = "cm", dpi = 600)
ggsave("driftCorrectionOffset_H2O.png", driftCorrectionOffsetH2O_comb,  width = 18, height = 10, units = "cm", dpi = 600)


# Gap filling uncertainty -------------------------------------------------
# number of filled gaps
sum(!is.na(df$NEE_f)) - sum(!is.na(df$NEE_orig))

# plot to inspect, how the uncertainty scales with the flux magnitude.
plot( NEE_fsd ~ NEE_fall, df )

# uncertainty estimation considering autocorrelation
results <- df %>% 
  mutate( resid = ifelse(NEE_fqc == 0, NEE_orig - NEE_fall, NA )) 

acf(results$resid, na.action = na.pass, main = "", lag.max = 100)

# Computation of effective number of observations
autoCorr <- computeEffectiveAutoCorr(results$resid)
nEff <- computeEffectiveNumObs(results$resid, na.rm = TRUE)
c( nEff = nEff, nObs = sum(is.finite(results$resid)))

# correlate normally distributed variables to compute the RMSE
results %>% filter(NEE_fqc == 0) %>% summarise(
  nRec = sum(is.finite(NEE_fsd))
  , NEEMean = mean(NEE_f)
  , NEE_RMSE_nEff = sqrt(sum(resid^2, na.rm = T) / nEff)
  , NEE_RMSE_nObs = sqrt(sum(resid^2, na.rm = T) / nRec)
) %>% select(NEEMean, NEE_RMSE_nEff, NEE_RMSE_nObs)

temp <- results %>%
  filter(NEE_fqc == 0) %>%
  select(DATETIME, NEE_f, NEE_fall, resid) %>%
  na.omit() 

temp %>%
  ggplot(aes(NEE_f, NEE_fall)) +
  geom_point()

cor(temp$NEE_f, temp$NEE_fall)

# windroses using openair ---------------------------------------------------------------
df_wind <- df %>%
  select(DATETIME, wind_speed, wind_dir, TKE) %>%
  rename(date = DATETIME) %>%
  filter(year(date) != 2010) %>%
  mutate(CP = factor(ifelse(year(date) < 2010, "Before 2010", "After 2010"), levels = c("Before 2010", "After 2010")))

# Generate color gradients
cols_WS <- cmocean("amp", start = .1, end = 1)
cols_TKE <- cmocean("thermal", start = .15, end = 1)

# WS
WR_EC_WS <- 
  df_wind %>%
  windRose(., ws = "wind_speed", wd = "wind_dir", angle = 5, type = c("CP"), paddle = F, cols = cols_WS(6), grid.line = 2,
           key = list(labels = c("< 2", "2 - 4", "4 - 6", "6 - 8", "8 - 10", "> 10")), strip.left = F, strip = T,
           breaks = 6, offset = 0, dig.lab = 2, annotate = F, key.position = "right", layout = c(2,1), main = "wind speed")

# TKE
WR_EC_TKE <- 
  df_wind %>%
  windRose(., ws = "TKE", wd = "wind_dir", angle = 5, type = c("CP"), paddle = F, cols = rev(cols_TKE(6)), key.footer = expression('(m'^2*'s'^-2*')'), 
           key = list(labels = c("< 1", "1 - 2", "2 - 3", "3 - 4", "4 - 5", "> 5")), grid.line = 2, strip.left = F, strip = T,
           breaks = c(0,1,2,3,4,5), offset = 0, dig.lab = 2, annotate = F, key.position = "right", layout = c(2,1), main = "TKE")

# export
png("WindRose_EC_WS.png", width = 8.3, height = 5, units = "cm", res = 300)
WR_EC_WS
dev.off()

png("WindRose_EC_TKE.png", width = 8.3, height = 5, units = "cm", res = 300)
WR_EC_TKE
dev.off()

# CO2 efflux when snow ?  -------------------------------------------------
df_Albedo <- df_SSH %>%
  filter(Rg > 50) %>%
  mutate(Albedo = SWout / SWin) %>%
  filter(Albedo < 1) %>%
  group_by(as_date(DATETIME)) %>%
  summarise_at(.vars = vars(Albedo), ~mean(.,na.rm=T)) %>%
  filter(Albedo > 0.8)

df_Albedo %>%  
  ggplot(aes(`as_date(DATETIME)`, Albedo)) +
  geom_point() +
  scale_y_continuous(limits = c(0,1))

N_Obs <- unname(pull(df %>%
  filter(as_date(DATETIME) %in% df_Albedo$`as_date(DATETIME)` & Ta < 273.15) %>%
  summarise_at(vars(F_CO2), ~sum(is.na(.)==F))))

df %>%
  mutate(Ts = Ts - 273.15,
         Ta = Ta - 273.15) %>%
  filter(as_date(DATETIME) %in% df_Albedo$`as_date(DATETIME)` & Ta < 0) %>%
  select(DATETIME, F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank) %>%
  # summarise_at(vars(F_CO2), ~sum(is.na(.)==F))
  mutate(TIME_DEC = as.numeric(str_sub(as.character(DATETIME), 12, 13)) + as.numeric(str_sub(as.character(DATETIME), 15, 16)) / 60,) %>%
  group_by(TIME_DEC) %>%
  summarise_at(vars(F_CO2, F_CO2_HC_Burba, F_CO2_HC_Frank), .funs = c(~mean(., na.rm = T), ~sum(!is.na(.)))) %>%
  pivot_longer(cols = c(F_CO2_mean, F_CO2_HC_Burba_mean, F_CO2_HC_Frank_mean), values_to = "values", names_to = "names") %>%
  ggplot(aes(TIME_DEC, values, color = names))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_line() +
  geom_text(aes(x = TIME_DEC, y = -0.4, label = F_CO2_sum), size = 3, color = "black") +
  annotate(geom = "text", x = 3, y = -0.3, size = 4, label = paste("Number of observations:", N_Obs, sep = " ")) +
  scale_x_continuous(breaks = c(0,6,12,18,24), labels = c(0,6,12,18,24)) +
  labs(title = "Diurnal course with high albedo", y = expression('Mean CO'[2]*' flux ('*mu*'mol m'^-2*'s'^-1*')'), x = "Hour of the Day") +
  scale_color_manual(values = c("firebrick", "dodgerblue", "black"), labels = c("Burba et al. (2008)", "Frank & Massman (2020)", "Uncorrected")) +
  theme_cowplot() +
  theme(legend.title = element_blank(), legend.position = c(.05,.8))

ggsave("Diurnal_course_with_high_albedo.png", dpi = 300)
ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/Diurnal_course_with_high_albedo.png", dpi = 300)


# Winter air and surface temperatures -------------------------------------
plot_air_surf_temp <- df %>%
  select(DATETIME, Ta, Ts) %>%
  mutate(TIME_DEC = as.numeric(str_sub(as.character(DATETIME), 12, 13)) + as.numeric(str_sub(as.character(DATETIME), 15, 16)) / 60,
         Ta = Ta - 273.15,
         Ts = Ts - 273.15) %>%
  group_by(month(DATETIME), TIME_DEC) %>%
  summarise_at(vars(Ta, Ts), mean, na.rm = T) %>%
  pivot_longer(cols = c(Ta, Ts), values_to = "values", names_to = "names") %>%
  ggplot(aes(TIME_DEC, values, color = names))+
  geom_hline(yintercept = 0, color = "grey") +
  geom_line() +
  scale_x_continuous(breaks = c(0,6,12,18,24), labels = c(0,6,12,18,24)) +
  scale_color_manual(values = c("dodgerblue", "firebrick"), labels = c("Air Temperature", "Soil Temperature")) +
  facet_wrap(~`month(DATETIME)`, ncol = 3, labeller = labeller(`month(DATETIME)` = month_name)) +
  labs(y = "Temperature (°C)", x = "Hour of the Day") +
  theme_light() +
  theme(legend.title = element_blank(), legend.position = "top", legend.justification = c("left", "center"),
        panel.background = element_rect(colour = "black", fill = NA), panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = "dashed", color = "grey70"), panel.border = element_rect(colour = "black", fill = NA),
        strip.background = element_rect(colour = "black", fill = NA), strip.text = element_text(colour = 'black'), 
        strip.text.x = element_text(margin = margin(.1, 0, .1, 0, "cm")))

ggsave("Diurnal_course_air_soil_temp.png", plot_air_surf_temp, dpi = 300)
ggsave("~/TransTiP/_NamCo_sync/4_Texte/Alpine Steppe_ESSD/Diurnal_course_air_soil_temp.png", plot_air_surf_temp, width = 12, height = 12, units = "cm", dpi = 600)

